Import Packages

# Load packages
library(tidyverse)
library(here)
library(ggplot2)
library(zoo)
library(dplyr)
library(patchwork)
library(mgcv)
library(ggpubr)
library(rstatix)
library(readr)
library(gridExtra)
library(rmcorr) 

# Set working directory
mds_dir <- here::here()

Defining Functions to Clean & Combine Data

# Remove rows for which Heart Rate is 0
remove_zero_heart_rate <- function(data) {
  data %>% 
    filter(`Heart Rate` != 0)
}

# Add Time column (1000 rows = 1 second)
add_time_column <- function(data) {
  data %>%
    group_by(Id) %>%
    mutate(Time = (row_number() - 1) / 1000) %>%  # Convert row number to seconds
    ungroup()
}

# Change 4 and 8 events which are repeated to 0 (i.e. If event column has 4, 4, 4 -> 4, 0, 0)
clean_repeated_events <- function(data) {
  data %>%
    arrange(Id, Time) %>% 
    group_by(Id) %>%
    mutate(
      # Track time differences and event changes
      time_diff = Time - lag(Time, default = -Inf),
      event_changed = EVENT != lag(EVENT, default = first(EVENT)),
      
      # Identify new clusters of 4/8
      cluster = cumsum(
        (EVENT %in% c(4, 8) & (time_diff > 0.5 | event_changed)) |
          !(EVENT %in% c(4, 8))
      )
    ) %>%
    group_by(Id, cluster) %>%
    mutate(
      # Replace repeats with 0 within each cluster
      EVENT = ifelse(
        EVENT %in% c(4, 8) & row_number() > 1,
        0,
        EVENT
      )
    ) %>%
    ungroup() %>%
    select(-time_diff, -event_changed, -cluster)  # Remove helper columns
}

clean_consecutive_rr <- function(data) {
  data %>%
    group_by(Id) %>%
    arrange(Time) %>%
    mutate(
      `Clean R-R` = if_else(
        row_number() == 1 | `ECG R-R` != lag(`ECG R-R`),
        `ECG R-R`,
        NA_real_
      )
    ) %>%
    ungroup()
}

Load and Process All Subject Data

# Import data
import_subjects_exp <- function(data_dir = "C:\\Users\\CAyre\\Documents\\Coding\\deepdream-analysis\\pre-study\\ECG\\ACQ") {
  # Get list of all subject files (e.g., ID01_baseline.csv, ID02_baseline.csv)
  file_list <- list.files(
    path = here(data_dir),
    pattern = "\\d+_experiment\\.csv",
    full.names = TRUE
  )

  # Read and combine all files
  data <- map_df(file_list, ~ {
    read_delim(.x, 
              delim = ";",
              col_types = cols(.default = col_character()),
              show_col_types = FALSE) %>%
      mutate(Id = as.integer(str_extract(basename(.x), "\\d+"))) %>%  # Extract ID
      type_convert()  # Automatically convert columns to appropriate types
  })

  # Arrange by subject ID
  data <- data %>% arrange(Id)

  return(data)
}

import_subjects_base <- function(data_dir = "C:\\Users\\CAyre\\Documents\\Coding\\deepdream-analysis\\pre-study\\ECG\\ACQ") {
  # Get list of all subject files (e.g., ID01_baseline.csv, ID02_baseline.csv)
  file_list <- list.files(
    path = here(data_dir),
    pattern = "ID\\d+_baseline\\.csv",
    full.names = TRUE
  )

  # Read and combine all files
  data <- map_df(file_list, ~ {
    read_delim(.x, 
              delim = ";",
              col_types = cols(.default = col_character()),
              show_col_types = FALSE) %>%
      mutate(Id = as.integer(str_extract(basename(.x), "\\d+"))) %>%  # Extract ID
      type_convert()  # Automatically convert columns to appropriate types
  })

  # Arrange by subject ID
  data <- data %>% arrange(Id)

  return(data)
}

process_all_subjects <- function(raw_data) {
  # Clean column names
  raw_data <- raw_data %>%
    #rename_all(~str_trim(.) %>% make.names()) %>%  # Force valid column names
    rename(Id = matches("^id$", ignore.case = TRUE)) %>%  # Explicitly rename ID column
    mutate(Id = as.integer(Id))
  
  # Verify Id column exists
  if (!"Id" %in% colnames(raw_data)) {
    stop("Id column still missing after renaming. Columns found: ",
         paste(colnames(raw_data), collapse = ", "))
  }
  
  # Process using split-apply-combine
  raw_data %>%
    split(.$Id) %>%  # Base R splitting by Id
    map_dfr(~ {
      .x %>%
        add_time_column() %>%
        remove_zero_heart_rate() %>%
        clean_repeated_events() %>%
        mutate(Id = first(Id))  # Explicitly maintain Id column
    }) %>%
    arrange(Id)
}


# Execution
data_exp <- import_subjects_exp()
data_base <- import_subjects_base()

data_exp_processed <- process_all_subjects(data_exp)
data_base_processed <- process_all_subjects(data_base)
data_exp_processed <- clean_consecutive_rr(data_exp_processed)
data_base_processed <- clean_consecutive_rr(data_base_processed)

Pulling out Condition Orders for each Subject

# Define file paths
file_paths <- list.files(path = "pre-study", pattern = "_QuestionsData.csv", full.names = TRUE)

# Import CSV files and combine them into a single data frame
questions_combined <- bind_rows(lapply(file_paths, read_csv2))

condensed_questions_combined <- questions_combined %>%
  select(Id, VideoName, VideoMode, QuestionIndex, QuestionAnswer) %>%
  mutate(
    Condition = as.factor(VideoMode),
    VideoMode = case_when(
      grepl("Main_Stereo", VideoName) ~ "Stereo",
      grepl("Main_Mono", VideoName) ~ "Mono",
      TRUE ~ as.character(VideoMode)
    ),
    VideoName = as.factor(VideoName),
    QuestionIndex = as.factor(QuestionIndex)
  )

# Table which links condition to section number and ID
processed_questions_combined <- condensed_questions_combined %>%
  group_by(Id) %>%
  arrange(Id, row_number()) %>%
  mutate(Order = paste(VideoMode, Condition, sep = "_")) %>%
  summarise(Condition_Order = list(unique(Order)), .groups = "drop") %>%
  unnest_longer(Condition_Order) %>%
  group_by(Id) %>%
  mutate(Order_Number = row_number()) %>%
  ungroup()

# Calculate ASC_Score for each (Id, VideoMode, VideoName)
asc_scores <- condensed_questions_combined %>%
  group_by(Id, VideoMode, VideoName) %>%
  mutate(ASC_Score = mean(QuestionAnswer[QuestionIndex %in% 4:13], na.rm = TRUE)) %>%
  summarise(
    ASC_Score = mean(QuestionAnswer[((QuestionIndex)) %in% 4:13], na.rm = TRUE),
    VideoName = case_when(
      grepl("Hallucinations", VideoName) ~ "Hallucinations",
      grepl("Normal", VideoName) ~ "Normal",
      TRUE ~ as.character(VideoName)
    ),
    .groups = "drop"
  ) %>%
  mutate(Condition_Order = paste(VideoMode, VideoName, sep = "_")) # Create Condition_Order to match

# Combining ASC Scores with Condition_Order
processed_questions_asc <- merge(processed_questions_combined, asc_scores,
                   by.x = c("Id", "Condition_Order"), 
                   all.x = TRUE)

Investigating Events

plot_event_by_frame <- function(subject_id, data = data_exp_processed) {
  # Filter data for the given Id and remove EVENT = 0
  subject_data <- data %>% 
    filter(Id == subject_id, EVENT != 0)
  
  # Create the plot
  ggplot(subject_data, aes(x = seq_along(EVENT), y = EVENT)) +
    geom_point(color = "red", size = 1, alpha = 0.7) +  # Add points
    labs(
      title = paste("EVENT by Frame for Id =", subject_id),
      x = "Frame (Index)",
      y = "EVENT"
    ) +
    theme_minimal()
}

# Execution
map(1, plot_event_by_frame)  # Only plotting the first subject
## [[1]]

Looking Between Events 4 and 8 for Experimental Data

There are four sections of data per subject that occur between an event 4 and event 8 marker. Ensure there are no extraneous 4 events before proceeding with analysis

# In the experimental data, Subject 2, there is a random event 4 at the start of the data which does not correlate to an event 8 (the next event 8 happens ~1000 seconds later). I'm filtering this out.
data_exp_processed <- data_exp_processed %>%
  mutate(
    EVENT = if_else(
      Id == 2 & Time == 1.531,  # Match time with tolerance for precision
      0,  # Replace EVENT with 0
      EVENT  # Keep original value otherwise
    )
  )

# Select only points between events 4 and 8
filter_between_events <- function(data, start_event = 4, end_event = 8) {
  data %>%
    group_by(Id) %>%
    mutate(
      start_flag = EVENT == start_event,  # Mark start points
      end_flag = EVENT == end_event       # Mark end points
    ) %>%
    mutate(
      section_id = cumsum(start_flag),  # Track sections where EVENT == 4 starts
      valid_section = section_id > lag(cumsum(end_flag), default = 0)  # Ensure pairing
    ) %>%
    filter(valid_section) %>%
    select(-start_flag, -end_flag, -valid_section) %>%
    ungroup()
}

# Filter out points between events 4 and 8
filtered_exp <- filter_between_events(data_exp_processed)


# Calculating Mean Heart Rate and Heart Rate Variability in each section (between Event = 4 and Event = 8) for each subject
results <- filtered_exp %>%
  filter(Id %in% 1:14, section_id %in% 1:4) %>%
  group_by(Id, section_id) %>%
  arrange(Time) %>%
  summarise(
    mean_heart_rate = mean(`Heart Rate`, na.rm = TRUE),
    
    # Improved HRV calculation
    heart_rate_variability = {
      clean_rr <- na.omit(`Clean R-R`)
      if (length(clean_rr) >= 2) {
        sqrt(mean(diff(clean_rr)^2))  # RMSSD from consecutive non-NA values
      } else {
        NA_real_
      }
    },
    .groups = "drop"
  ) %>%
  mutate(
    mean_heart_rate = ifelse(is.nan(mean_heart_rate), NA, mean_heart_rate)
  )
# View results sorted by Id and section
results %>% arrange(Id, section_id)
## # A tibble: 56 × 4
##       Id section_id mean_heart_rate heart_rate_variability
##    <int>      <int>           <dbl>                  <dbl>
##  1     1          1            90.6                 0.0224
##  2     1          2            89.9                 0.0607
##  3     1          3            88.6                 0.0237
##  4     1          4            91.5                 0.0376
##  5     2          1            94.5                 0.0265
##  6     2          2            96.0                 0.0301
##  7     2          3            99.0                 0.0239
##  8     2          4            96.9                 0.0745
##  9     3          1            98.0                 0.0518
## 10     3          2            97.1                 0.0151
## # ℹ 46 more rows
# Join results table with the condensed data to include condition names
linked_results <- results %>%
  left_join(processed_questions_combined, by = c("Id" = "Id", "section_id" = "Order_Number"))

# Uncomment below to see the 4 & 8 events picked out for a given subject Id
#data_event <- data_exp_processed %>%
#  filter(Id == 2 & EVENT %in% c(4, 8)) 
#data_event

Paired T-Test for Mean Heart Rate

# Function to run paired t-tests for all combinations of conditions
run_all_paired_ttests <- function(data) {
  # Check required columns exist
  required_cols <- c("Id", "Condition_Order", "mean_heart_rate", "heart_rate_variability")
  if (!all(required_cols %in% colnames(data))) {
    stop("Missing required columns. Needed: ", paste(required_cols, collapse = ", "))
  }

  # Create all possible condition pairs
  conditions <- unique(data$Condition_Order)
  pairs <- as.data.frame(t(combn(conditions, 2))) %>%
    setNames(c("condition1", "condition2"))

  # Process both metrics
  purrr::map_df(c("mean_heart_rate", "heart_rate_variability"), function(metric) {
    purrr::map_df(1:nrow(pairs), function(i) {
      pair <- pairs[i, ]

      # Prepare paired data for current metric
      paired_data <- data %>%
        filter(Condition_Order %in% c(pair$condition1, pair$condition2)) %>%
        pivot_wider(
          id_cols = Id,
          names_from = Condition_Order,
          values_from = all_of(metric),
          names_prefix = "condition_"
        ) %>%
        na.omit()

      # Skip if insufficient data
      if (nrow(paired_data) < 2) {
        return(tibble(
          metric = metric,
          comparison = paste(pair$condition1, "vs", pair$condition2),
          message = "Insufficient data (n < 2)"
        ))
      }

      # Perform t-test
      t.test(
        paired_data[[paste0("condition_", pair$condition1)]],
        paired_data[[paste0("condition_", pair$condition2)]],
        paired = TRUE
      ) %>%
        broom::tidy() %>%
        mutate(
          metric = metric,
          comparison = paste(pair$condition1, "vs", pair$condition2),
          n_pairs = nrow(paired_data),
          .before = 1
        )
    })
  })
}

# Execute
paired_ttest_results <- run_all_paired_ttests(linked_results)
print(paired_ttest_results)
## # A tibble: 12 × 11
##    metric       comparison n_pairs estimate statistic p.value parameter conf.low
##    <chr>        <chr>        <int>    <dbl>     <dbl>   <dbl>     <dbl>    <dbl>
##  1 mean_heart_… Stereo_Ha…      14  0.218      0.272   0.790         13 -1.52   
##  2 mean_heart_… Stereo_Ha…      14  0.124      0.155   0.879         13 -1.60   
##  3 mean_heart_… Stereo_Ha…      14  0.197      0.242   0.812         13 -1.56   
##  4 mean_heart_… Mono_Norm…      14 -0.0947    -0.142   0.889         13 -1.53   
##  5 mean_heart_… Mono_Norm…      14 -0.0213    -0.0276  0.978         13 -1.69   
##  6 mean_heart_… Mono_Hall…      14  0.0734     0.0965  0.925         13 -1.57   
##  7 heart_rate_… Stereo_Ha…      14 -0.0160    -1.92    0.0774        13 -0.0340 
##  8 heart_rate_… Stereo_Ha…      14 -0.00603   -0.838   0.417         13 -0.0216 
##  9 heart_rate_… Stereo_Ha…      14 -0.00419   -0.674   0.512         13 -0.0176 
## 10 heart_rate_… Mono_Norm…      14  0.00995    1.07    0.302         13 -0.0101 
## 11 heart_rate_… Mono_Norm…      14  0.0118     1.32    0.209         13 -0.00746
## 12 heart_rate_… Mono_Hall…      14  0.00184    0.236   0.817         13 -0.0150 
## # ℹ 3 more variables: conf.high <dbl>, method <chr>, alternative <chr>

Plotting Paired T-test Results (Commented out)

plot_individual_paired_comparisons <- function(data, ttest_results, metric = "mean_heart_rate") {
  # Filter results for specified metric
  metric_results <- ttest_results %>%
    filter(metric == !!metric) %>%
    mutate(
      condition1 = sub(" vs.*", "", comparison),
      condition2 = sub(".*vs ", "", comparison)
    )
  
  # Create list of individual plots
  plots <- map(1:nrow(metric_results), function(i) {
    pair <- metric_results[i, ]
    
    # Prepare paired data
    pair_data <- data %>%
      filter(Condition_Order %in% c(pair$condition1, pair$condition2)) %>%
      select(Id, Condition_Order, value = all_of(metric)) %>%
      pivot_wider(
        names_from = Condition_Order,
        values_from = value,
        names_prefix = "Condition_"
      ) %>%
      na.omit()
    
    # Convert to long format for plotting
    plot_data <- pair_data %>%
      pivot_longer(
        cols = -Id,
        names_to = "Condition",
        values_to = "value"
      ) %>%
      mutate(Condition = factor(sub("Condition_", "", Condition)))
    
    # Create plot
    ggplot(plot_data, aes(x = Condition, y = value)) +
      geom_boxplot(aes(fill = Condition), width = 0.3, outlier.shape = NA, alpha = 0.8) +
      geom_point(color = "gray40", size = 2.5, alpha = 0.7) +
      geom_line(aes(group = Id), color = "gray60", alpha = 0.5) +
      scale_fill_brewer(palette = "Set2") +  # ColorBrewer qualitative palette
      labs(
        title = paste("Comparison:", pair$comparison),
        subtitle = paste0("Paired t-test: p = ", round(pair$p.value, 3)),
        x = "Condition",
        y = ifelse(metric == "mean_heart_rate",
                   "Mean Heart Rate (bpm)", 
                   "Heart Rate Variability (RMSSD)"),
        fill = "Condition"
      ) +
      theme_minimal(base_size = 14) +
      theme(
        legend.position = "right",
        panel.grid.major.x = element_blank(),
        plot.title = element_text(face = "bold"),
        plot.subtitle = element_text(color = "grey40")
      )
  })
  
  # Name plots by comparison
  names(plots) <- metric_results$comparison
  return(plots)
}

# Generate individual plots for mean heart rate
meanhr_plots_ttest <- plot_individual_paired_comparisons(
  data = linked_results,
  ttest_results = paired_ttest_results,
  metric = "mean_heart_rate"
)

# Generate individual plots for heart rate variability
hrv_plots_ttest <- plot_individual_paired_comparisons(
  data = linked_results,
  ttest_results = paired_ttest_results,
  metric = "heart_rate_variability"
)

# Print all plots (commented out)
# walk(meanhr_plots_ttest, print)
# walk(hrv_plots_ttest, print)

Plot HR and R-R For All Subjects/Conditions

# Join the mapping with filtered_exp to sync conditions
filtered_exp_synced <- filtered_exp %>%
  left_join(processed_questions_combined, by = c("Id" = "Id", "section_id" = "Order_Number")) %>%
  filter(!is.na(`Clean R-R`))  # Remove NA values

# Plotting function
create_subject_plot_rr <- function(subject_data) {
  ggplot(subject_data, aes(x = Time, y = `Clean R-R`)) +
    geom_line(color = "#2c7bb6", linewidth = 0.4) +
    facet_wrap(~ Condition_Order, nrow = 2, ncol = 2, scales = "free_x") +
    labs(
      title = paste("Subject", unique(subject_data$Id)),
      x = "Time (s)",
      y = "R-R Interval (s)"
    ) +
    theme_minimal() +
    theme(
      strip.background = element_rect(fill = "#f5f5f5"),
      strip.text = element_text(size = 8, face = "bold"),
      axis.text = element_text(size = 6),
      plot.title = element_text(size = 10, hjust = 0.5),
      panel.spacing = unit(0.3, "cm")
    )
}

create_subject_plot_hr <- function(subject_data) {
  ggplot(subject_data, aes(x = Time, y = `Heart Rate`)) +
    geom_line(color = "#2c7bb6", linewidth = 0.4) +
    facet_wrap(~ Condition_Order, nrow = 2, ncol = 2, scales = "free_x") +
    labs(
      title = paste("Subject", unique(subject_data$Id)),
      x = "Time (s)",
      y = "Heart Rate (bpm)"
    ) +
    theme_minimal() +
    theme(
      strip.background = element_rect(fill = "#f5f5f5"),
      strip.text = element_text(size = 8, face = "bold"),
      axis.text = element_text(size = 6),
      plot.title = element_text(size = 10, hjust = 0.5),
      panel.spacing = unit(0.3, "cm")
    )
}

# Split data by subject and create plots
hrv_subject_plots <- filtered_exp_synced %>%
  group_split(Id) %>%
  map(create_subject_plot_rr)

hr_subject_plots <- filtered_exp_synced %>%
  group_split(Id) %>%
  map(create_subject_plot_hr)

# View all plots
walk(hr_subject_plots, print) 

walk(hrv_subject_plots, print) 

## Identifying Outliers for HR and HRV

# Old function which used MAD Threshold to identify outliers
#analyze_metric_outliers <- function(data, metric_col, threshold = 3.5) {
#  data %>%
#    group_by(Id, Condition_Order) %>%
#    mutate(
#      median_val = median(!!sym(metric_col), na.rm = TRUE),
#      mad_threshold = threshold * mad(!!sym(metric_col), na.rm = TRUE),
#      is_outlier = abs(!!sym(metric_col) - median_val) / mad(!!sym(metric_col), na.rm = TRUE) > threshold
#    ) %>%
#    ungroup()
#}

# 40% average threshold to identify outliers
analyze_metric_outliers <- function(data, metric_col) {
  data %>%
    group_by(Id, Condition_Order) %>%
    mutate(
      avg_val = mean(!!sym(metric_col), na.rm = TRUE),
      lower_threshold = 0.6 * avg_val,  # 40% below average
      upper_threshold = 1.4 * avg_val,  # 40% above average
      is_outlier = (!!sym(metric_col) < lower_threshold) | (!!sym(metric_col) > upper_threshold)
    ) %>%
    ungroup()
}

# Process both metrics
filtered_exp_annotated <- filtered_exp_synced %>%
  analyze_metric_outliers("Clean R-R") %>%  # For HRV (R-R)
  rename(
    avg_rr = avg_val,
    lower_threshold_rr = lower_threshold,
    upper_threshold_rr = upper_threshold,
    is_outlier_rr = is_outlier
  ) %>%
  analyze_metric_outliers("Heart Rate") %>%  # For HR
  rename(
    avg_hr = avg_val,
    lower_threshold_hr = lower_threshold,
    upper_threshold_hr = upper_threshold,
    is_outlier_hr = is_outlier
  )

create_metric_plot <- function(subject_data, metric, y_label) {
  metric_sym <- sym(metric)
  prefix <- ifelse(metric == "Clean R-R", "rr", "hr")
  
  outlier_counts <- subject_data %>%
    group_by(Condition_Order) %>%
    summarise(outlier_count = sum(!!sym(paste0("is_outlier_", prefix)), na.rm = TRUE), .groups = "drop")
  
  ggplot(subject_data, aes(x = Time, y = !!metric_sym)) +
    geom_line(color = "gray60", linewidth = 0.3) +
    geom_point(
      data = ~ filter(.x, !!sym(paste0("is_outlier_", prefix))), 
      color = "red", size = 0.8, alpha = 0.7
    ) +
    geom_hline(
      aes(yintercept = !!sym(paste0("avg_", prefix))), 
      color = "blue", linetype = "dashed", linewidth = 0.3
    ) +
    geom_hline(
      aes(yintercept = !!sym(paste0("upper_threshold_", prefix))), 
      color = "darkgreen", linetype = "dotted", linewidth = 0.3
    ) +
    geom_hline(
      aes(yintercept = !!sym(paste0("lower_threshold_", prefix))), 
      color = "darkgreen", linetype = "dotted", linewidth = 0.3
    ) +
    geom_text(
      data = outlier_counts,
      aes(x = -Inf, y = Inf, label = paste("Outliers:", outlier_count)),
      hjust = -0.1, 
      vjust = 1.5, 
      size = 2.5, 
      color = "red"
    ) +
    facet_wrap(~ Condition_Order, nrow = 2, ncol = 2, scales = "free_x") +
    labs(
      title = paste("Subject", unique(subject_data$Id), "-", y_label),
      x = "Time (s)",
      y = y_label,
      caption = "Blue dashed: Average | Green dotted: 40% Thresholds"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(size = 10, hjust = 0.5),
      strip.text = element_text(size = 8),
      axis.text = element_text(size = 6)
    )
}

# Create and save plots for both metrics
outlier_subject_plots_hrv <- filtered_exp_annotated %>%
  group_split(Id) %>%
  map(~ create_metric_plot(.x, "Clean R-R", "R-R Interval (ms)"))

outlier_subject_plots_hr <- filtered_exp_annotated %>%
  group_split(Id) %>%
  map(~ create_metric_plot(.x, "Heart Rate", "Heart Rate (bpm)"))

# Print HR plots 
walk(outlier_subject_plots_hr, print)

# Print HRV plots 
walk(outlier_subject_plots_hrv, print)

# Combined summary
outlier_summary <- filtered_exp_annotated %>%
  group_by(Id, Condition_Order) %>%
  summarise(
    total_points = n(),
    hr_outliers = sum(is_outlier_hr, na.rm = TRUE),
    hrv_outliers = sum(is_outlier_rr, na.rm = TRUE),
    .groups = "drop"
  )

# View combined outliers results
print(outlier_summary)
## # A tibble: 56 × 5
##       Id Condition_Order       total_points hr_outliers hrv_outliers
##    <dbl> <chr>                        <int>       <int>        <int>
##  1     1 Mono_Hallucinations            351           0            0
##  2     1 Mono_Normal                    355           2            2
##  3     1 Stereo_Hallucinations          364           0            1
##  4     1 Stereo_Normal                  363           1            0
##  5     2 Mono_Hallucinations            396           0            0
##  6     2 Mono_Normal                    383           0            1
##  7     2 Stereo_Hallucinations          374           0            0
##  8     2 Stereo_Normal                  382           2            2
##  9     3 Mono_Hallucinations            384           0            0
## 10     3 Mono_Normal                    387           3            2
## # ℹ 46 more rows

Re-plotting HR and HRV without Outliers (Commented out)

# Process data with outlier replacement for both metrics
filtered_exp_cleaned <- filtered_exp_synced %>%
  group_by(Id, Condition_Order) %>%
  mutate(
    # HRV (R-R) calculations
    avg_rr = mean(`Clean R-R`, na.rm = TRUE),
    lower_threshold_rr = 0.6 * avg_rr,
    upper_threshold_rr = 1.4 * avg_rr,
    is_outlier_rr = (`Clean R-R` < lower_threshold_rr) | (`Clean R-R` > upper_threshold_rr),
    Clean_RR_Filtered = ifelse(is_outlier_rr, NA, `Clean R-R`),
    
    # HR calculations
    avg_hr = mean(`Heart Rate`, na.rm = TRUE),
    lower_threshold_hr = 0.6 * avg_hr,
    upper_threshold_hr = 1.4 * avg_hr,
    is_outlier_hr = (`Heart Rate` < lower_threshold_hr) | (`Heart Rate` > upper_threshold_hr),
    Heart_Rate_Filtered = ifelse(is_outlier_hr, NA, `Heart Rate`)
  ) %>%
  ungroup()


create_subject_plot <- function(subject_data, metric = c("HRV", "HR")) {
  metric <- match.arg(metric)
  
  if (metric == "HRV") {
    y_var <- sym("Clean_RR_Filtered")
    avg_var <- "avg_rr"
    lower_threshold_var <- "lower_threshold_rr"
    upper_threshold_var <- "upper_threshold_rr"
    y_label <- "Filtered R-R Interval (ms)"
    outlier_var <- "is_outlier_rr"
  } else {
    y_var <- sym("Heart_Rate_Filtered")
    avg_var <- "avg_hr"
    lower_threshold_var <- "lower_threshold_hr"
    upper_threshold_var <- "upper_threshold_hr"
    y_label <- "Filtered Heart Rate (bpm)"
    outlier_var <- "is_outlier_hr"
  }
  
  # Prepare threshold and outlier data
  threshold_data <- subject_data %>%
    group_by(Condition_Order) %>%
    summarise(
      avg_val = first(!!sym(avg_var)),
      upper_threshold = first(!!sym(upper_threshold_var)),
      lower_threshold = first(!!sym(lower_threshold_var)),
      .groups = "drop"
    )
  
  outlier_counts <- subject_data %>%
    group_by(Condition_Order) %>%
    summarise(outlier_count = sum(!!sym(outlier_var), na.rm = TRUE), .groups = "drop")
  
  # Create the plot
  ggplot(subject_data, aes(x = Time, y = !!y_var)) +
    geom_line(color = "#1f77b4", linewidth = 0.4) +
    geom_hline(
      data = threshold_data,
      aes(yintercept = avg_val),
      color = "blue", 
      linetype = "dashed", 
      linewidth = 0.3
    ) +
    geom_hline(
      data = threshold_data,
      aes(yintercept = upper_threshold),
      color = "darkgreen", 
      linetype = "dotted", 
      linewidth = 0.3
    ) +
    geom_hline(
      data = threshold_data,
      aes(yintercept = lower_threshold),
      color = "darkgreen", 
      linetype = "dotted", 
      linewidth = 0.3
    ) +
    geom_text(
      data = outlier_counts,
      aes(x = Inf, y = Inf, label = paste("Outliers Removed:", outlier_count)),
      hjust = 1.1, 
      vjust = 1.5, 
      size = 3, 
      color = "red"
    ) +
    facet_wrap(~ Condition_Order, nrow = 2, ncol = 2, scales = "free_x") +
    labs(
      title = paste("Subject", unique(subject_data$Id), "-", metric),
      x = "Time (s)",
      y = y_label,
      caption = "Blue dashed: Average | Green dotted: 50% Thresholds"
    ) +
    theme_minimal()
}


# Generate and view plots for both metrics
hr_plots_no_outlier <- filtered_exp_cleaned %>%
  group_split(Id) %>%
  map(~ create_subject_plot(.x, "HR"))

hrv_plots_no_outlier <- filtered_exp_cleaned %>%
  group_split(Id) %>%
  map(~ create_subject_plot(.x, "HRV"))

# Print HR plots (Uncomment to print)
#walk(hr_plots_no_outlier, print)

# Print HRV plots (Uncomment to print)
#walk(hrv_plots_no_outlier, print)

# View outlier summary table (Uncomment to print)
#print(outlier_summary)

Re-doing t-tests without Outliers

# Calculate metrics using FILTERED DATA
results_filtered <- filtered_exp_cleaned %>%
  filter(Id %in% 1:14, section_id %in% 1:4) %>%
  group_by(Id, section_id) %>%
  arrange(Time) %>%
  summarise(
    # Use filtered heart rate (outliers replaced with NA)
    mean_heart_rate = mean(Heart_Rate_Filtered, na.rm = TRUE),
    
    # HRV calculation using filtered RR intervals
    heart_rate_variability = {
      clean_rr <- na.omit(Clean_RR_Filtered)
      if (length(clean_rr) >= 2) {
        sqrt(mean(diff(clean_rr)^2))  # RMSSD calculation
      } else {
        NA_real_
      }
    },
    .groups = "drop"
  ) %>%
  mutate(
    mean_heart_rate = ifelse(is.nan(mean_heart_rate), NA, mean_heart_rate)
  )

# View results sorted by Id and section
results_filtered %>% arrange(Id, section_id)
## # A tibble: 56 × 4
##       Id section_id mean_heart_rate heart_rate_variability
##    <dbl>      <int>           <dbl>                  <dbl>
##  1     1          1            91.0                 0.0207
##  2     1          2            90.2                 0.0269
##  3     1          3            89.1                 0.0237
##  4     1          4            92.0                 0.0376
##  5     2          1            94.9                 0.0265
##  6     2          2            96.6                 0.0268
##  7     2          3            99.4                 0.0239
##  8     2          4            97.5                 0.0243
##  9     3          1            98.0                 0.0250
## 10     3          2            97.5                 0.0151
## # ℹ 46 more rows
# Join results table with the condensed data to include condition names
linked_results_filtered <- results_filtered %>%
  left_join(processed_questions_combined, by = c("Id" = "Id", "section_id" = "Order_Number"))

paired_ttest_results_filtered <- run_all_paired_ttests(linked_results_filtered)

# Generate individual plots for heart rate variability (no outliers)
ttest_hrv_plots_no_outliers <- plot_individual_paired_comparisons(
  data = linked_results_filtered,
  ttest_results = paired_ttest_results_filtered,
  metric = "heart_rate_variability"
)

# Generate individual plots for heart rate variability (no outliers)
ttest_meanhr_plots_no_outlier <- plot_individual_paired_comparisons(
  data = linked_results_filtered,
  ttest_results = paired_ttest_results_filtered,
  metric = "mean_heart_rate"
)

# Print all plots
walk(ttest_meanhr_plots_no_outlier, print)

walk(ttest_hrv_plots_no_outliers, print)

# Uncomment to Create PDF report
# pdf("HR_HRV_Report.pdf", width = 11, height = 8.5)  # Letter size landscape
# HR plots
# walk(hr_subject_plots, print)
# walk(meanhr_plots_ttest, print)
# walk(outlier_subject_plots_hr, print)
# walk(hr_plots_no_outlier, print)
# walk(ttest_meanhr_plots_no_outlier, print)
# HRV plots
# walk(hrv_subject_plots, print)
# walk(hrv_plots_ttest, print)
# walk(outlier_subject_plots_hrv, print)
# walk(hrv_plots_no_outlier, print)
# walk(ttest_hrv_plots_no_outliers, print)
# Close PDF device
# dev.off()

paired_ttest_results_filtered
## # A tibble: 12 × 11
##    metric       comparison n_pairs estimate statistic p.value parameter conf.low
##    <chr>        <chr>        <int>    <dbl>     <dbl>   <dbl>     <dbl>    <dbl>
##  1 mean_heart_… Stereo_Ha…      14  2.30e-1    0.300    0.769        13 -1.42   
##  2 mean_heart_… Stereo_Ha…      14  1.24e-1    0.154    0.880        13 -1.62   
##  3 mean_heart_… Stereo_Ha…      14  2.85e-1    0.349    0.733        13 -1.48   
##  4 mean_heart_… Mono_Norm…      14 -1.06e-1   -0.167    0.870        13 -1.47   
##  5 mean_heart_… Mono_Norm…      14  5.51e-2    0.0714   0.944        13 -1.61   
##  6 mean_heart_… Mono_Hall…      14  1.61e-1    0.220    0.830        13 -1.42   
##  7 heart_rate_… Stereo_Ha…      14 -1.54e-3   -1.12     0.281        13 -0.00449
##  8 heart_rate_… Stereo_Ha…      14 -6.88e-4   -0.402    0.694        13 -0.00438
##  9 heart_rate_… Stereo_Ha…      14 -7.18e-4   -0.284    0.781        13 -0.00618
## 10 heart_rate_… Mono_Norm…      14  8.49e-4    0.537    0.600        13 -0.00257
## 11 heart_rate_… Mono_Norm…      14  8.20e-4    0.389    0.703        13 -0.00373
## 12 heart_rate_… Mono_Hall…      14 -2.97e-5   -0.0111   0.991        13 -0.00583
## # ℹ 3 more variables: conf.high <dbl>, method <chr>, alternative <chr>

Plotting Standardized HR and HRV (Outliers Removed)

filtered_exp_cleaned <- filtered_exp_cleaned %>%
  group_by(Id) %>%
  mutate(
    # Calculate mean and standard deviation for Clean R-R
    mean_rr = mean(`Clean_RR_Filtered`, na.rm = TRUE),
    sd_rr = sd(`Clean_RR_Filtered`, na.rm = TRUE),
    
    # Standardized Clean R-R
    Clean_RR_Standardized = (`Clean_RR_Filtered` - mean_rr) / sd_rr,
    
    # Calculate mean and standard deviation for Heart Rate
    mean_hr = mean(`Heart_Rate_Filtered`, na.rm = TRUE),
    sd_hr = sd(`Heart_Rate_Filtered`, na.rm = TRUE),
    
    # Standardized Heart Rate
    Heart_Rate_Standardized = (`Heart_Rate_Filtered` - mean_hr) / sd_hr
  ) %>%
  ungroup()

create_subject_plot_standardized <- function(subject_data, metric = c("HRV", "HR")) {
  metric <- match.arg(metric)
  if (metric == "HRV") {
    y_var <- sym("Clean_RR_Standardized")
    y_label <- "Standardized R-R Interval"
  } else {
    y_var <- sym("Heart_Rate_Standardized")
    y_label <- "Standardized Heart Rate"
  }

  # Create the plot
  ggplot(subject_data, aes(x = Time, y = !!y_var)) +
    geom_line(color = "#1f77b4", linewidth = 0.4) +
    facet_wrap(~ Condition_Order, nrow = 2, ncol = 2, scales = "free_x") +
    labs(
      title = paste("Subject", unique(subject_data$Id), "- Standardized ", metric),
      x = "Time (s)",
      y = y_label,
    ) +
    theme_minimal()
}

# Generate and view plots for both metrics
hr_plots_standardized <- filtered_exp_cleaned %>%
  group_split(Id) %>%
  map(~ create_subject_plot_standardized(.x, "HR"))

hrv_plots_standardized <- filtered_exp_cleaned %>%
  group_split(Id) %>%
  map(~ create_subject_plot_standardized(.x, "HRV"))

# Print HR plots
walk(hr_plots_standardized, print)

# Print HRV plots
walk(hrv_plots_standardized, print)

Plotting Repeated Measures Correlation - ASC Score vs. HRV

# Add Mean HR and HRV to ASC Scores 
processed_questions_merged <- merge(processed_questions_asc, linked_results_filtered,
                   by.x = c("Id", "Condition_Order"), 
                   all.x = TRUE)

# Remove duplicate rows
pqm <- processed_questions_merged %>% distinct()

# Combine Mono and Stereo data
# Group by Id and VideoName to calculate mean ASC_Score and heart_rate_variability
pqm_combined <- pqm %>%
  group_by(Id, VideoName) %>%
  summarise(
    ASC_Score = mean(ASC_Score, na.rm = TRUE),
    heart_rate_variability = mean(heart_rate_variability, na.rm = TRUE)
  ) %>%
  ungroup()

# ASC vs HRV (Mono and Stereo ave)
ggplot(pqm_combined, aes(x = heart_rate_variability, y = ASC_Score, color = as.factor(Id), shape = VideoName)) +
  geom_point(size = 2) +  # Scatterplot points
  geom_smooth(aes(group = Id), method = "lm", se = FALSE, linetype = "solid") + 
  labs(
    x = "Heart Rate Variability",
    y = "ASC Score",
    color = "Subject ID",
    shape = "Video Type"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

# ASC vs HRV 
ggplot(pqm, aes(x = heart_rate_variability, y = ASC_Score, color = as.factor(Id), shape = VideoName)) +
  geom_point(size = 2) +  # Scatterplot points
  geom_smooth(aes(group = Id), method = "lm", se = FALSE, linetype = "solid") + 
  labs(
    x = "Heart Rate Variability (s)",
    y = "ASC Score",
    color = "Subject ID",
    shape = "Video Type"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

# Compute repeated measures correlation using combined mono/stereo scores
rmcorr_result_combined <- rmcorr(participant = Id, measure1 = ASC_Score, measure2 = heart_rate_variability, dataset = pqm_combined)

# View the results
print(rmcorr_result_combined)
## 
## Repeated measures correlation
## 
## r
## -0.03873742
## 
## degrees of freedom
## 13
## 
## p-value
## 0.8909804
## 
## 95% confidence interval
## -0.5402791 0.4831122
# Compute repeated measures correlation without combined mono/stereo scores (4 points per subject)
rmcorr_result <- rmcorr(participant = Id, measure1 = ASC_Score, measure2 = heart_rate_variability, dataset = pqm)

# View the results
print(rmcorr_result)
## 
## Repeated measures correlation
## 
## r
## -0.09878415
## 
## degrees of freedom
## 41
## 
## p-value
## 0.5285458
## 
## 95% confidence interval
## -0.3876274 0.2077227